Redact(string,string,IList<PDFRedact>) Method

Summary

Redacts a PDF document stored in a file.

Syntax

public static void Redact( 
   string fileName, 
   string password, 
   IList<PDFRedact> redacts 
)

Parameters

fileName

Name of the file containing an existing PDF document to be redacted.

password

The password to use if fileName contains an encrypted PDF file.

redacts

One or more PDF redact objects.

Remarks

Redaction can be used to remove sensitive information from an existing PDF document.

This method quickly redacts an existing PDF document in place by removing any character, image, or shape that intersects with any of the PDFRedact.Bounds of redacts. The resulting PDF is not re-generated and therefore will maintain the same exact compression, metadata, fonts and any other resources.

Use the following code to redact all data in a PDF page at location 0, 0 to 100,100:

// Create a PDF redaction object 
var redact = new PDFRedact(0, 0, 100, 100); 
// Redact the file: 
PDFFile.Redact(pdfFileName, new List<PDFRedact> { redact });

Example

This example will parse the text of a PDF file, find the locations of all items containing the word "LEADTOOLS" and redacts them.

using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Controls; 
using Leadtools.Drawing; 
using Leadtools.ImageProcessing; 
using Leadtools.Pdf; 
using Leadtools.Svg; 
using Leadtools.WinForms; 
 
 
private static void RedactExample() 
{ 
   const string toRedact = "LEADTOOLS"; 
 
   // Make a copy of 'leadtools.pdf' installed with LEADTOOLS 
   string imagesDir = @"C:\LEADTOOLS21\Resources\Images"; 
   string pdfFileName = Path.Combine(imagesDir, "leadtools-redacted.pdf"); 
   File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, true); 
 
   // We will use PDFDocument to find the position of the words to redact 
 
   // Find any text containing the word "LEADTOOLS" in the document 
   var allWords = new List<MyPDFWord>(); 
   using (var pdfDocument = new PDFDocument(pdfFileName)) 
   { 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1); 
 
      // Build the words for each page from PDFDocumentPage.Objects 
      foreach (PDFDocumentPage pdfPage in pdfDocument.Pages) 
      { 
         IList<MyPDFWord> words = GetPageWords(pdfPage); 
         allWords.AddRange(words); 
      } 
   } 
 
   // Now create a PDFRedact object for each word that contains the value we want to redact 
   string toRedactLower = toRedact.ToLowerInvariant(); 
   var pdfRedacts = new List<PDFRedact>(); 
   foreach (MyPDFWord word in allWords) 
   { 
      if (word.Value.ToLowerInvariant().Contains(toRedactLower)) 
      { 
         Console.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}"); 
         var pdfRedact = new PDFRedact(); 
         pdfRedact.PageNumber = word.PageNumber; 
         pdfRedact.Bounds = new PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom); 
         pdfRedacts.Add(pdfRedact); 
      } 
   } 
 
   // Redact the document 
   PDFFile.Redact(pdfFileName, null, pdfRedacts); 
 
   // Finally, verify that the redact PDF does not have the redacted words anymore 
   using (var pdfDocument = new PDFDocument(pdfFileName)) 
   { 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1); 
 
      // Build the words for each page from PDFDocumentPage.Objects 
      foreach (PDFDocumentPage pdfPage in pdfDocument.Pages) 
      { 
         IList<MyPDFWord> words = GetPageWords(pdfPage); 
 
         foreach (MyPDFWord word in words) 
         { 
            Debug.Assert(!word.Value.ToLowerInvariant().Contains(toRedactLower)); 
         } 
      } 
   } 
} 
 
// Class to define a word in a PDF page 
class MyPDFWord 
{ 
   // Page number 
   public int PageNumber; 
   // The value as a string 
   public string Value; 
   // Its location in the PDF coordinate 
   public LeadRectD Bounds; 
} 
 
private static IList<MyPDFWord> GetPageWords(PDFDocumentPage pdfPage) 
{ 
   var words = new List<MyPDFWord>(); 
 
   IList<PDFObject> objects = pdfPage.Objects; 
   if (objects == null || objects.Count == 0) 
      return words; 
 
   int objectIndex = 0; 
   int objectCount = objects.Count; 
 
   double pageHeight = pdfPage.Height; 
 
   // Loop through all the objects 
   while (objectIndex < objectCount) 
   { 
      // Find the total bounding rectangle, begin and end index of the next word 
      LeadRectD wordBounds = LeadRectD.Empty; 
      int firstObjectIndex = objectIndex; 
 
      // Loop till we reach EndOfWord or reach the end of the objects 
      bool more = true; 
      while (more) 
      { 
         PDFObject pdfObject = objects[objectIndex]; 
         // Is it text? 
         if (pdfObject.ObjectType == PDFObjectType.Text) 
         { 
            PDFRect pdfBounds = pdfObject.Bounds; 
 
            // objectBounds are in bottom-left coordinate, convert it to top-left 
            LeadRectD objectBounds = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom); 
 
            // Add the bounding rectangle of this object 
            if (wordBounds.IsEmpty) 
               wordBounds = objectBounds; 
            else 
               wordBounds = LeadRectD.UnionRects(wordBounds, objectBounds); 
         } 
         else 
         { 
            firstObjectIndex = objectIndex + 1; 
         } 
 
         objectIndex++; 
         more = (objectIndex < objectCount) && !pdfObject.TextProperties.IsEndOfWord && !pdfObject.TextProperties.IsEndOfLine; 
      } 
 
      if (firstObjectIndex == objectIndex) 
      { 
         continue; 
      } 
 
      // From the begin and end index, collect the characters into a string 
      StringBuilder sb = new StringBuilder(); 
      for (int i = firstObjectIndex; i < objectIndex; i++) 
      { 
         if (objects[i].ObjectType == PDFObjectType.Text) 
            sb.Append(objects[i].Code); 
      } 
 
      // Add this word to the list 
 
      PDFObject lastObject = objects[objectIndex - 1]; 
 
      var word = new MyPDFWord(); 
      word.PageNumber = pdfPage.PageNumber; 
      word.Value = sb.ToString(); 
      word.Bounds = wordBounds; 
      words.Add(word); 
   } 
 
   return words; 
}

Imports Leadtools 
Imports Leadtools.Codecs 
Imports Leadtools.Pdf 
Imports Leadtools.WinForms 
Imports Leadtools.Svg 
Imports Leadtools.ImageProcessing 
 
Private Shared Sub RedactExample() 
   Const toRedact As String = "LEADTOOLS" 
 
   ' Make a copy of 'leadtools.pdf' installed with LEADTOOLS 
   Dim imagesDir As String = "C:\LEADTOOLS21\Resources\Images" 
   Dim pdfFileName As String = Path.Combine(imagesDir, "leadtools-redacted.pdf") 
   File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, True) 
 
   ' We will use PDFDocument to find the position of the words to redact 
   ' Find any text containing the word "LEADTOOLS" in the document 
   Dim allWords As New List(Of MyPDFWord)() 
   Using pdfDocument As New PDFDocument(pdfFileName) 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1) 
 
      ' Build the words for each page from PDFDocumentPage.Objects 
      For Each pdfPage As PDFDocumentPage In pdfDocument.Pages 
         Dim words As IList(Of MyPDFWord) = GetPageWords(pdfPage) 
         allWords.AddRange(words) 
      Next 
   End Using 
 
   ' Now create a PDFRedact object for each word that contains the value we want to redact 
   Dim toRedactLower As String = toRedact.ToLowerInvariant() 
   Dim pdfRedacts As New List(Of PDFRedact)() 
   For Each word As MyPDFWord In allWords 
      If word.Value.ToLowerInvariant().Contains(toRedactLower) Then 
         Console.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}") 
         Dim PDFRedact As New PDFRedact() 
         PDFRedact.PageNumber = word.PageNumber 
         PDFRedact.Bounds = New PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom) 
         pdfRedacts.Add(PDFRedact) 
      End If 
   Next 
 
   ' Redact the document 
   PDFFile.Redact(pdfFileName, Nothing, pdfRedacts) 
 
   ' Finally, verify that the redact PDF does not have the redacted words anymore 
   Using pdfDocument As New PDFDocument(pdfFileName) 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1) 
 
      ' Build the words for each page from PDFDocumentPage.Objects 
      For Each pdfPage As PDFDocumentPage In pdfDocument.Pages 
         Dim words As IList(Of MyPDFWord) = GetPageWords(pdfPage) 
 
         For Each word As MyPDFWord In words 
            Debug.Assert(Not word.Value.ToLowerInvariant().Contains(toRedactLower)) 
         Next 
      Next 
   End Using 
End Sub 
 
' Class to define a word in a PDF page 
Class MyPDFWord 
   ' Page number 
   Public PageNumber As Integer 
   ' The value as a string 
   Public Value As String 
   ' Its location in the PDF coordinate 
   Public Bounds As LeadRectD 
End Class 
 
Private Shared Function GetPageWords(pdfPage As PDFDocumentPage) As IList(Of MyPDFWord) 
   Dim words As New List(Of MyPDFWord)() 
 
   Dim objects As IList(Of PDFObject) = pdfPage.Objects 
   If (IsNothing(objects) OrElse objects.Count = 0) Then 
      Return words 
   End If 
   Dim objectIndex As Integer = 0 
   Dim objectCount As Integer = objects.Count 
 
   Dim pageHeight As Double = pdfPage.Height 
 
   ' Loop through all the objects 
   While objectIndex < objectCount 
      ' Find the total bounding rectangle, begin and end index of the next word 
      Dim wordBounds As LeadRectD = LeadRectD.Empty 
      Dim firstObjectIndex As Integer = objectIndex 
 
      ' Loop till we reach EndOfWord or reach the end of the objects 
      Dim more As Boolean = True 
      While more 
         Dim pdfObject As PDFObject = objects(objectIndex) 
         ' Is it text? 
         If pdfObject.ObjectType = PDFObjectType.Text Then 
            Dim pdfBounds As PDFRect = pdfObject.Bounds 
 
            ' objectBounds are in bottom-left coordinate, convert it to top-left 
            Dim objectBounds As LeadRectD = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom) 
 
            ' Add the bounding rectangle of this object 
            If wordBounds.IsEmpty Then 
               wordBounds = objectBounds 
            Else 
               wordBounds = LeadRectD.UnionRects(wordBounds, objectBounds) 
            End If 
         Else 
            firstObjectIndex = objectIndex + 1 
         End If 
 
         objectIndex = objectIndex + 1 
         more = (objectIndex < objectCount) AndAlso Not pdfObject.TextProperties.IsEndOfWord AndAlso Not pdfObject.TextProperties.IsEndOfLine 
      End While 
 
      If firstObjectIndex = objectIndex Then 
         Continue While 
      End If 
 
      ' From the begin and end index, collect the characters into a string 
      Dim sb As New StringBuilder() 
      For i As Integer = firstObjectIndex To objectIndex - 1 
         If objects(i).ObjectType = PDFObjectType.Text Then 
            sb.Append(objects(i).Code) 
         End If 
      Next 
 
      ' Add this word to the list 
 
      Dim lastObject As PDFObject = objects(objectIndex - 1) 
 
      Dim word As New MyPDFWord() 
      word.PageNumber = pdfPage.PageNumber 
      word.Value = sb.ToString() 
      word.Bounds = wordBounds 
      words.Add(word) 
   End While 
 
   Return words 
End Function

Requirements

Target Platforms

Reference

PDFFile Class

PDFFile Members

Leadtools.Pdf Namespace

Download our FREE evaluation

Help Version 21.0.2021.7.6

Leadtools.Pdf Assembly

Introduction

Getting Started

Namespaces

Leadtools.Pdf Namespace

Assemblies